import requests
import pandas as pd
import lxml.etree as le

# 一级页面url，定义变量参数
url = 'https://www.runoob.com/html/html-tutorial.html'

# xpath语句
x = '//a[@target="_top"]'

# 获取url源码
content = requests.get(url).content.decode('utf-8')

# 把html转为xml
contentx = le.HTML(content)

#提取数据
rets = contentx.xpath(x)
result = []
for ret in rets:
    ret_title = ret.text.strip()  # 获取导航标题
    ret_href = 'www.runoob.com' + ret.get('href') # 组合url
    print(ret_title, ret_href)
    result.append({'titles': ret_title, 'hrefs': ret_href}) # 追加数据

# 转为DataFrame
datas = pd.DataFrame(result)

# 写入excel
datas.to_excel('./菜鸟教程左侧导航.xlsx')
